Quality control
Contents
# import some utilities
# definition of some plotting tools
%matplotlib inline
from bokeh.layouts import gridplot
from bokeh.layouts import layout
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource
from bokeh.plotting import output_file
from bokeh.palettes import Category10
from bokeh.io import export_png
from bokeh.models import Legend
from bokeh.models import Range1d
from bokeh.models import ColorBar
from bokeh.models import LinearAxis
from bokeh.models import DatetimeTickFormatter
# select a palette
#from bokeh.palettes import Dark2_25 as palette
from bokeh.palettes import Spectral11, Category20b
from bokeh.palettes import Spectral6
from bokeh.plotting import output_file
from bokeh.transform import linear_cmap
import itertools
import datetime
import itertools
import numpy
import os
import re
import sys
from netCDF4 import Dataset
import pandas
import logging
import math
def color_gen():
yield from itertools.cycle(Category10[10])
color = color_gen()
from bokeh.resources import INLINE
output_notebook(INLINE)
export = False
import logging
import sys
sys.path.append(os.path.join(os.environ["HOME"],"Projects","SeaFlect","python"))
from DataPoolAccessModule import AeolusDataClass
from DataPoolAccessModule import QualityControlClass
from SurfaceReflectanceModule import SurfaceReflectanceModelClass
def make_plot(title, hist, edges):
p = figure(title=title, tools='', background_fill_color="#fafafa")
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
fill_color="navy", line_color="white", alpha=0.5)
p.y_range.start = 0
#p.legend.location = "center_right"
#p.legend.background_fill_color = "#fefefe"
p.xaxis.axis_label = 'x'
p.yaxis.axis_label = 'Pr(x)'
p.grid.grid_line_color="white"
return p
verbose_level = 'info'
logger=logging.getLogger()
logging.basicConfig(level=getattr(logging, verbose_level.upper()))
Cm = {}
Li = {}
wind_speed = numpy.linspace(2, 24, 23)
theory=SurfaceReflectanceModelClass(logger=logger)
theory.Ro_fixed=0.08
Li["low"] = numpy.array([theory.surface_reflectance(model="li",u=key, theta=33) for key in wind_speed]).transpose()
Cm["low"] = numpy.array([theory.surface_reflectance(model="concept",u=key, theta=33) for key in wind_speed]).transpose()
theory.Ro_fixed=0.016
Li["high"] = numpy.array([theory.surface_reflectance(model="li",u=key, theta=33) for key in wind_speed]).transpose()
Cm["high"] = numpy.array([theory.surface_reflectance(model="concept",u=key, theta=33) for key in wind_speed]).transpose()
AeolusProductObject = AeolusDataClass(debug=False, \
generate=False,
logger=logger)
exp = "fixed_range"
#exp = "flexible_range"
AeolusProductObject.pickle_data_file=os.path.join("SeaFlectProjectData","DataPool","seaflect_data_"+exp + ".pkl")
figure_path = os.path.join("project_figures", "notebook", exp)
if not os.path.exists(figure_path):
os.makedirs(figure_path)
[data_pool, available_periods, available_regions] = AeolusProductObject.get_data()
AeolusProductObject.data_pool=data_pool
# now that we have a consistent dataset we can make an annual
AeolusProductObject._create_annual_(periods=available_periods)
# and globa
AeolusProductObject._create_global_(regions=available_regions)
#definitions of range bins accoring to c-convention
AeolusProductObject.snr_up=20
AeolusProductObject.snr_low=10
range_bin={"top":0, "atmosphere":3, "surface":4, "ocean":5, "background":6}
Quality controlΒΆ
Results presented so far are without any filtering of the data. Hence the observations are collected also over field of view which contains clouds or aerosols. Initial experience with the raw data indicated that a robust quality control is needed.
The quality control is based on a number of parameters, the so-called scattering ratio provided in the L1B product datatream, the SNR ratio, the solar altitude and the standard deviation of the mean ACCD counts. To recall here we consider observations which are averaged over an extended geographical domain of about 87 km. Besides the mean signal also the standard deviation is available as a product. It is used to filter those data elements which are prepared over inhomogeneous scenes, i.e. scenes for which the standard deviation of the mean signal is above an upper or below a lower threshold.
If the sun is below the horizon we expect the background signal to be small. Although the signals are corrected for the background signal, it is hoped that the data collected during βnightβ are of better quality than during βdayβ. For this the solar altitude (\(\phi_{alt}\)) standard threshold of -7 can use used or the astronomical twilight threshold of -16 degrees.
The scattering ratio is derived by the level 2 A processor and gives an indication of the fraction of molecular over sum of partical and molecular scattering. If there are no particles within the scene then this scattering ratio should be 1. For the data on the course spatial resolution, this value has not yet been found. Here a value of 1.9 has been adopted.
The signal to noise ratio is a final quality control check. Currently only the snr of the surface range bin is used to determine if the data should be rejected for further processing. A lower threshold of 10 and upper threshold of 20 is used.
in summary the following thresholds are used
Parameter |
Value |
|---|---|
\(\Upsilon_{rms}\) |
5 |
\(\phi_{alt}\): standard |
-7 |
\(\phi_{alt}\): astronomical |
-16 |
SCAT |
1.9 |
SNR\(_{low}\) |
10 |
SNR\(_{high}\) |
20 |
The values for all field of views of selected parameters are shown in the next figures
SNRΒΆ
region="E"
graphic=[]
period="AnnualCycle"
dataset=data_pool[period][region]
time = dataset["L1Bmie"]["date"]
p=figure(x_axis_type="datetime", plot_width=800, plot_height=150, \
title="SNR {} {}".format(period, region),\
tools="box_zoom, pan,reset,save,wheel_zoom")
#basic signal
p.circle(time, dataset["L1Bmie"]["snr"][:,range_bin["atmosphere"]], \
color="red", legend_label="atmosphere")
p.yaxis.axis_label = 'SNR []'
graphic.append(p)
p=figure(x_axis_type="datetime", plot_width=800, plot_height=150, \
title="SNR {} {}".format(period, region),\
tools="box_zoom, pan,reset,save,wheel_zoom")
#basic signal
p.circle(time, dataset["L1Bmie"]["snr"][:,range_bin["surface"]], \
color="green", legend_label="surface")
p.yaxis.axis_label = 'SNR []'
graphic.append(p)
p=figure(x_axis_type="datetime", plot_width=800, plot_height=150, \
title="SNR {} {}".format(period, region),\
tools="box_zoom, pan,reset,save,wheel_zoom")
p.circle(time, dataset["L1Bmie"]["snr"][:,range_bin["ocean"]],\
color="blue", legend_label="ocean")
p.yaxis.axis_label = 'SNR []'
graphic.append(p)
plot = gridplot(graphic, ncols=1, width=800, height=300, toolbar_location="right")
show(plot)
if export:
figure_name = os.path.join(figure_path,"snr_{}_{}.png".format(period, region))
export_png(plot, filename=figure_name)
Scattering RatioΒΆ
region="E"
graphic=[]
period="AnnualCycle"
dataset=data_pool[period][region]
time = dataset["L1Bmie"]["date"]
p=figure(x_axis_type="datetime", plot_width=800, plot_height=150, \
title="Scattering ratio {} {}".format(period, region),\
tools="box_zoom, pan,reset,save,wheel_zoom")
#basic signal
p.circle(time, dataset["L1Bmie"]["scat"][:,range_bin["atmosphere"]], \
color="red", legend_label="atmosphere")
p.yaxis.axis_label = 'scat []'
p.y_range = Range1d(-2, 10)
graphic.append(p)
p=figure(x_axis_type="datetime", plot_width=800, plot_height=150, \
title="Scattering ratio {} {}".format(period, region),\
tools="box_zoom, pan,reset,save,wheel_zoom")
#basic signal
p.circle(time, dataset["L1Bmie"]["scat"][:,range_bin["surface"]], \
color="green", legend_label="surface")
p.yaxis.axis_label = 'scat []'
p.y_range = Range1d(-2, 10)
graphic.append(p)
p=figure(x_axis_type="datetime", plot_width=800, plot_height=150, \
title="Scattering ratio {} {}".format(period, region),\
tools="box_zoom, pan,reset,save,wheel_zoom")
p.circle(time, dataset["L1Bmie"]["scat"][:,range_bin["ocean"]],\
color="blue", legend_label="ocean")
p.yaxis.axis_label = 'scat []'
p.y_range = Range1d(-2, 10)
graphic.append(p)
plot = gridplot(graphic, ncols=1, width=800, height=300, toolbar_location="right")
show(plot)
if export:
figure_name = os.path.join(figure_path,"scat_{}_{}.png".format(period, region))
export_png(plot, filename=figure_name)
Standard deviationΒΆ
region="E"
graphic=[]
period="AnnualCycle"
dataset=data_pool[period][region]
time = dataset["L1Bmie"]["date"]
p=figure(x_axis_type="datetime", plot_width=800, plot_height=150, \
title="Standard Deviation {} {}".format(period, region),\
tools="box_zoom, pan,reset,save,wheel_zoom")
#basic signal
p.circle(time, dataset["L1Bmie"]["sgnlstdv"][:,range_bin["atmosphere"]], \
color="red", legend_label="atmosphere")
p.yaxis.axis_label = 'std []'
p.y_range = Range1d(-2, 20)
graphic.append(p)
p=figure(x_axis_type="datetime", plot_width=800, plot_height=150, \
title="Standard Deviation {} {}".format(period, region),\
tools="box_zoom, pan,reset,save,wheel_zoom")
#basic signal
p.circle(time, dataset["L1Bmie"]["sgnlstdv"][:,range_bin["surface"]], \
color="green", legend_label="surface")
p.yaxis.axis_label = 'std []'
p.y_range = Range1d(-10, 20)
graphic.append(p)
p=figure(x_axis_type="datetime", plot_width=800, plot_height=150, \
title="Standard Deviation {} {}".format(period, region),\
tools="box_zoom, pan,reset,save,wheel_zoom")
p.circle(time, dataset["L1Bmie"]["sgnlstdv"][:,range_bin["ocean"]],\
color="blue", legend_label="ocean")
p.yaxis.axis_label = 'std []'
p.y_range = Range1d(-2, 20)
graphic.append(p)
plot = gridplot(graphic, ncols=1, width=800, height=300, toolbar_location="right")
show(plot)
if export:
figure_name = os.path.join(figure_path,"stddev_{}_{}.png".format(period, region))
export_png(plot, filename=figure_name)
Discussion on QC parametersΒΆ
ACCD checkΒΆ
The accd counts themselves should be positive. If they are negative, this could be result of the onground processing and could indicate the uncertainty of the signal itself. Maybe it should be considered to remove all ACCD counts if they are below a certain small positive value. But it is hoped that this is detected throught the SNR check. So for the time being the ACCD-check only filters observations with negative ACCD counts.
Scattering ratioΒΆ
There appears to be a lower limit of the scattering ratio. the interpretation could be that there are always some particles in the lower boundary which is causing the scattering ratio to be larger than 1.6 or 1.8. That is why an upper threshold of 1.9 is adopted to keep the observations which are not so affected by particle scattering
Standard deviationΒΆ
There appears a lot of observations which have a standard deviation equal zero or even negative. A zero standard deviation indicate that the observations which contribute to the mean value all have the same value. This could be a default value, indicating bad observations. The quality control also removes data points which have a zero standard deviation.
SNRΒΆ
The same holds for the snr values, there are numerous datapoints (maybe the same with a zero standard deviation) which have a zero SNR. This is meaningless and hence these are filtered and removed from the processing chain.
Result after FilteringΒΆ
The next figure shows the data point after specific filtering settings. The following table identifies the different settings
Parameter |
Applied filters |
|---|---|
elementary |
scattering ratio and snr test |
basic |
as elementary, but with positive signal |
rigorous |
as basic but with the standard deviation test |
night |
either basic or rigorous with the solar altitude below -7 |
test with a solar altitude below -16 leaves very little data points. That test has not been implemented.
The next figures show for region E the impact on the number of point for different filtering settings.
Case |
Number of data points |
|---|---|
Baseline (no filter) |
9813 |
elementary |
187 |
basic |
118 |
rigorous |
21 |
elementary_night |
45 |
basic_night |
30 |
rigorous_night |
6 |
From this experiment it can be seen that especially the standard deviation test is very efficient to remove data points from the processing chain. At this point it is not clear if indeed a standard deviation threshold of 5 counts is too rigorous or not. This is further analysed in subsequent sections.
# the qc has three levels basic, enhanced, rigorous
# have a look at the qc filtered
#
region="E"
period="AnnualCycle"
qc_level = "sgnl_scat_snr"
dataset = AeolusProductObject.data_pool[period][region]
# print ("{}".format(len(dataset["L1Bmie"]["date"])))
qc = AeolusProductObject.apply_quality_control(level=qc_level, period=period, region=region)
graphic=[]
time = dataset["L1Bmie"]["date"][qc]
# print ("{}".format(len(dataset["L1Bmie"]["date"])))
s=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {} {}".format(period, region, qc_level, len(qc)))
s.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["atmosphere"]], \
color="red", legend_label="atmosphere")
s.yaxis.axis_label = 'ACCD counts [-]'
s.legend.location = "top_left"
graphic.append(s)
p22=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {}".format(period, region, qc_level))
p22.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["surface"]], \
color="green", legend_label="surface")
p22.yaxis.axis_label = 'ACCD counts [-]'
p22.legend.location = "top_left"
graphic.append(p22)
p23=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {}".format(period, region, qc_level))
p23.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["ocean"]], \
color="blue", legend_label="ocean")
p23.yaxis.axis_label = 'ACCD counts [-]'
p23.legend.location = "top_left"
graphic.append(p23)
plot = gridplot(graphic, ncols=1, width=800, height=300,\
toolbar_location="right")
show(plot)
if export:
figure_name = os.path.join(figure_path,"{}_accd_time_serie_{}.png".format(qc_level, period))
export_png(plot, filename=figure_name)
# the qc has three levels basic, enhanced, rigorous
# have a look at the qc filtered
#
region="E"
period="AnnualCycle"
qc_level = "sgnl_scat_snr_stdev"
#
dataset =AeolusProductObject.data_pool[period][region]
qc = AeolusProductObject.apply_quality_control(level=qc_level, period=period, region=region)
graphic=[]
time = dataset["L1Bmie"]["date"][qc]
s=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {} {}".format(period, region, qc_level, len(qc)))
s.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["atmosphere"]], \
color="red", legend_label="atmosphere")
s.yaxis.axis_label = 'ACCD counts [-]'
s.legend.location = "top_left"
graphic.append(s)
p22=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {}".format(period, region, qc_level))
p22.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["surface"]], \
color="green", legend_label="surface")
p22.yaxis.axis_label = 'ACCD counts [-]'
p22.legend.location = "top_left"
graphic.append(p22)
p23=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {}".format(period, region, qc_level))
p23.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["ocean"]], \
color="blue", legend_label="ocean")
p23.yaxis.axis_label = 'ACCD counts [-]'
p23.legend.location = "top_left"
graphic.append(p23)
plot = gridplot(graphic, ncols=1, width=800, height=300,\
toolbar_location="right")
show(plot)
if export:
figure_name = os.path.join(figure_path,"{}_accd_time_serie_{}.png".format(qc_level, period))
export_png(plot, filename=figure_name)
# the qc has three levels basic, enhanced, rigorous
# have a look at the qc filtered
#
region="E"
period="AnnualCycle"
qc_level = "sgnl_scat_snr_night"
#
dataset =AeolusProductObject.data_pool[period][region]
qc = AeolusProductObject.apply_quality_control(level=qc_level, period=period, region=region)
graphic=[]
time = dataset["L1Bmie"]["date"][qc]
s=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {} {}".format(period, region, qc_level, len(qc)))
s.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["atmosphere"]], \
color="red", legend_label="atmosphere")
s.yaxis.axis_label = 'ACCD counts [-]'
s.legend.location = "top_left"
graphic.append(s)
p22=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {}".format(period, region, qc_level))
p22.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["surface"]], \
color="green", legend_label="surface")
p22.yaxis.axis_label = 'ACCD counts [-]'
p22.legend.location = "top_left"
graphic.append(p22)
p23=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {}".format(period, region, qc_level))
p23.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["ocean"]], \
color="blue", legend_label="ocean")
p23.yaxis.axis_label = 'ACCD counts [-]'
p23.legend.location = "top_left"
graphic.append(p23)
plot = gridplot(graphic, ncols=1, width=800, height=300,\
toolbar_location="right")
show(plot)
if export:
figure_name = os.path.join(figure_path,"{}_accd_time_serie_{}.png".format(qc_level, period))
export_png(plot, filename=figure_name)
# the qc has three levels basic, enhanced, rigorous
# have a look at the qc filtered
#
region="E"
period="AnnualCycle"
qc_level = "sgnl_scat_snr_stdev_night"
#
dataset =AeolusProductObject.data_pool[period][region]
qc = AeolusProductObject.apply_quality_control(level=qc_level, period=period, region=region)
graphic=[]
time = dataset["L1Bmie"]["date"][qc]
s=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {} {}".format(period, region, qc_level, len(qc)))
s.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["atmosphere"]], \
color="red", legend_label="atmosphere")
s.yaxis.axis_label = 'ACCD counts [-]'
s.legend.location = "top_left"
graphic.append(s)
p22=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {}".format(period, region, qc_level))
p22.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["surface"]], \
color="green", legend_label="surface")
p22.yaxis.axis_label = 'ACCD counts [-]'
p22.legend.location = "top_left"
graphic.append(p22)
p23=figure(x_axis_type="datetime", width=800, height=150, \
tools="box_zoom,pan,crosshair, reset,save,wheel_zoom",\
title="ACCD {} {} {}".format(period, region, qc_level))
p23.circle(time, dataset["L1Bmie"]["sgnl"][qc,range_bin["ocean"]], \
color="blue", legend_label="ocean")
p23.yaxis.axis_label = 'ACCD counts [-]'
p23.legend.location = "top_left"
graphic.append(p23)
plot = gridplot(graphic, ncols=1, width=800, height=300,\
toolbar_location="right")
show(plot)
if export:
figure_name = os.path.join(figure_path,"{}_accd_time_serie_{}.png".format(qc_level, period))
export_png(plot, filename=figure_name)
DiscussionΒΆ
There are two key qc parameters, namely the snr and the positive value of the signal